In [8]:
import torch
import numpy as np

import matplotlib.pyplot as plt

In [5]:
from torchvision import datasets, transforms

Prepare data


In [6]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                               ])

# Download and load the training data
trainset = datasets.MNIST("MNIST_data/", download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!

In [7]:
dataiter = iter(trainloader)

images, labels = dataiter.next()

print(type(images))
print(images.shape)
print(labels.shape)


<class 'torch.Tensor'>
torch.Size([64, 1, 28, 28])
torch.Size([64])

In [11]:
plt.imshow(images[1].numpy().squeeze(), cmap="gray")


Out[11]:
<matplotlib.image.AxesImage at 0x2293c20e7f0>

In [35]:
def activation(x):
    return 1/(1 + torch.exp(-x))

features = images.view((images.shape[0], -1))

n_input = features.shape[1]
n_hidden  = 256
n_output = 10

W1 = torch.randn(n_input, n_hidden)
B1 = torch.randn(n_hidden)

W2 = torch.randn(n_hidden, n_output)
B2 = torch.randn(n_output)

H1 = activation(torch.mm(features, W1) + B1)
H2 = torch.mm(H1, W2) + B2

In [72]:
H1.shape, H2.shape


Out[72]:
(torch.Size([64, 256]), torch.Size([64, 10]))

In [73]:
def softmax(x):
    return torch.exp(x) / torch.sum(torch.exp(x), dim=1).view(-1, 1)

In [74]:
out = softmax(H2)

In [75]:
out.shape


Out[75]:
torch.Size([64, 10])

In [76]:
print(out.sum(dim=1))


tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000])

Building networks with PyTorch


In [77]:
from torch import nn

In [78]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        # define transformations
        self.hidden = nn.Linear(784, 256)
        self.output = nn.Linear(256, 10)
        
        # define activations
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        # pass input tensor through each of our operations
        x = self.hidden(x)
        x = self.sigmoid(x)
        x = self.output(x)
        x = self.softmax(x)
        
        return x

In [79]:
model = Network()

In [80]:
print(model)


Network(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
  (sigmoid): Sigmoid()
  (softmax): Softmax()
)

uses common operations in torch.nn.functional as F


In [82]:
import torch.nn.functional as F

In [83]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        # define transformations
        self.hidden = nn.Linear(784, 256)
        self.output = nn.Linear(256, 10)
                
    def forward(self, x):
        # pass input tensor through each of our operations
        x = F.sigmoid(self.hidden(x))
        x = F.softmax(self.output(x))
        
        return x

In [84]:
model = Network()
print(model)


Network(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)

Build a full network


In [90]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        # define transformations
        self.hidden_1 = nn.Linear(784, 128)
        self.hidden_2 = nn.Linear(128, 64)        
        self.output = nn.Linear(64, 10)
                
    def forward(self, x):
        # pass input tensor through each of our operations
        x = F.relu(self.hidden_1(x))
        x = F.relu(self.hidden_2(x))
        x = F.softmax(self.output(x))
        
        return x

In [91]:
model = Network()
print(model)


Network(
  (hidden_1): Linear(in_features=784, out_features=128, bias=True)
  (hidden_2): Linear(in_features=128, out_features=64, bias=True)
  (output): Linear(in_features=64, out_features=10, bias=True)
)